Contents:

Load necessary packages

Load Metabolomics data and clinial information

The data we will use were collected from individuals with and without Down sydrome. The samples are from blood plasma and metabolomics data were generated for each individual.

# we will use the iris dataset as an illustrative example
data("iris")
iris <- unique(iris) # remove duplicates
iris.mat <- as.matrix(iris[,1:4])
dim(iris)
## [1] 149   5
colnames(iris)
## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"
# read in the raw metabolomics data
x <- getURL("https://raw.githubusercontent.com/DS3-2024/Visualize_Cluster_HTP/main/data/P4C_LCMS_abundance_wide_011722.csv")
mets <- read.csv(text = x, row.names = 1, header = T)
dim(mets)
## [1] 419 174
row.names(mets)
##   [1] "HTP0001B2" "HTP0005A3" "HTP0012A2" "HTP0015A4" "HTP0017A4" "HTP0018B3"
##   [7] "HTP0019B2" "HTP0022B2" "HTP0023A2" "HTP0025A3" "HTP0026B2" "HTP0034B3"
##  [13] "HTP0035B3" "HTP0036A2" "HTP0040B3" "HTP0042B2" "HTP0043B2" "HTP0044B2"
##  [19] "HTP0047B2" "HTP0048B2" "HTP0050A3" "HTP0051B2" "HTP0052A2" "HTP0053A2"
##  [25] "HTP0054A2" "HTP0055B2" "HTP0057B2" "HTP0058B2" "HTP0059B2" "HTP0061B2"
##  [31] "HTP0062B2" "HTP0066A2" "HTP0067B2" "HTP0070A3" "HTP0073A2" "HTP0077B2"
##  [37] "HTP0078B2" "HTP0079B2" "HTP0080A2" "HTP0084A2" "HTP0085B2" "HTP0087B2"
##  [43] "HTP0090B2" "HTP0092B2" "HTP0095B2" "HTP0096B2" "HTP0098B2" "HTP0100B2"
##  [49] "HTP0101A2" "HTP0103A2" "HTP0104B2" "HTP0105B3" "HTP0107A3" "HTP0109A3"
##  [55] "HTP0111B2" "HTP0112A3" "HTP0116A2" "HTP0118B"  "HTP0119A"  "HTP0123A" 
##  [61] "HTP0125B"  "HTP0126B2" "HTP0127B2" "HTP0146B"  "HTP0147B"  "HTP0148B2"
##  [67] "HTP0149B"  "HTP0151A"  "HTP0198A3" "HTP0202B"  "HTP0204A"  "HTP0205B" 
##  [73] "HTP0206B"  "HTP0207A4" "HTP0208B"  "HTP0211A3" "HTP0213A3" "HTP0214B2"
##  [79] "HTP0216B2" "HTP0219A2" "HTP0226A3" "HTP0230A2" "HTP0236A2" "HTP0238A3"
##  [85] "HTP0241A2" "HTP0242B3" "HTP0243A3" "HTP0247A2" "HTP0249A2" "HTP0251A2"
##  [91] "HTP0254A2" "HTP0259A2" "HTP0261A2" "HTP0262A2" "HTP0263A2" "HTP0277A3"
##  [97] "HTP0287B"  "HTP0288B"  "HTP0289B"  "HTP0290B"  "HTP0293A"  "HTP0294B" 
## [103] "HTP0295A"  "HTP0296A"  "HTP0298A"  "HTP0300A"  "HTP0301A3" "HTP0303A" 
## [109] "HTP0306A"  "HTP0307B"  "HTP0310A"  "HTP0312A2" "HTP0314B"  "HTP0315A" 
## [115] "HTP0316B"  "HTP0318A3" "HTP0319A"  "HTP0320A2" "HTP0321A"  "HTP0322B" 
## [121] "HTP0323A"  "HTP0324A2" "HTP0325A"  "HTP0326A"  "HTP0327A"  "HTP0328A" 
## [127] "HTP0329A2" "HTP0330A"  "HTP0331A"  "HTP0332A"  "HTP0333A2" "HTP0334A2"
## [133] "HTP0335A2" "HTP0336A"  "HTP0337A"  "HTP0338B"  "HTP0339A"  "HTP0340A" 
## [139] "HTP0341A"  "HTP0342A"  "HTP0343B"  "HTP0345A"  "HTP0346B"  "HTP0347B" 
## [145] "HTP0348A"  "HTP0349B"  "HTP0351A"  "HTP0352B"  "HTP0354A"  "HTP0355B" 
## [151] "HTP0357A"  "HTP0361B2" "HTP0364A"  "HTP0367A"  "HTP0368A"  "HTP0370A" 
## [157] "HTP0373A"  "HTP0374A"  "HTP0375A"  "HTP0376B"  "HTP0378A"  "HTP0379A" 
## [163] "HTP0380A"  "HTP0381A"  "HTP0382A"  "HTP0384A"  "HTP0385A"  "HTP0386A" 
## [169] "HTP0387A"  "HTP0388A"  "HTP0389A"  "HTP0390A"  "HTP0391A"  "HTP0392A" 
## [175] "HTP0393A"  "HTP0394A"  "HTP0395A"  "HTP0396A"  "HTP0397A"  "HTP0398A" 
## [181] "HTP0399A2" "HTP0400A"  "HTP0401A"  "HTP0402A"  "HTP0403A"  "HTP0404A" 
## [187] "HTP0405A"  "HTP0406A"  "HTP0407A"  "HTP0408A"  "HTP0409A"  "HTP0410A" 
## [193] "HTP0411A"  "HTP0412A"  "HTP0413A"  "HTP0414A"  "HTP0415A"  "HTP0418A" 
## [199] "HTP0419A"  "HTP0420A"  "HTP0421A"  "HTP0422A"  "HTP0423A"  "HTP0424A" 
## [205] "HTP0425A"  "HTP0426A"  "HTP0427A"  "HTP0428A"  "HTP0429B"  "HTP0430A" 
## [211] "HTP0431A2" "HTP0432A"  "HTP0433A"  "HTP0434A"  "HTP0435A2" "HTP0436A" 
## [217] "HTP0437A"  "HTP0438A"  "HTP0439A2" "HTP0440A"  "HTP0442A2" "HTP0443A2"
## [223] "HTP0444B2" "HTP0445A"  "HTP0446A"  "HTP0447B2" "HTP0448A"  "HTP0449A" 
## [229] "HTP0450A"  "HTP0451A"  "HTP0452A"  "HTP0454A"  "HTP0455B"  "HTP0456A" 
## [235] "HTP0457B"  "HTP0458A"  "HTP0459A"  "HTP0460A"  "HTP0461A"  "HTP0462A" 
## [241] "HTP0463A"  "HTP0464A"  "HTP0465A"  "HTP0466B"  "HTP0467A"  "HTP0468A2"
## [247] "HTP0469A"  "HTP0470A"  "HTP0471A"  "HTP0472A"  "HTP0473A"  "HTP0474A" 
## [253] "HTP0475A"  "HTP0476A"  "HTP0477A"  "HTP0478A"  "HTP0479A"  "HTP0480A" 
## [259] "HTP0482A"  "HTP0484A"  "HTP0485A"  "HTP0487A"  "HTP0488A"  "HTP0491A" 
## [265] "HTP0492B"  "HTP0493B"  "HTP0494A"  "HTP0495B"  "HTP0497B"  "HTP0498B" 
## [271] "HTP0501A"  "HTP0502B"  "HTP0506B"  "HTP0507A"  "HTP0508B2" "HTP0510A" 
## [277] "HTP0512A"  "HTP0514A"  "HTP0515A"  "HTP0516A"  "HTP0517A"  "HTP0519A" 
## [283] "HTP0520B"  "HTP0523B"  "HTP0524B"  "HTP0525A"  "HTP0527B"  "HTP0530A" 
## [289] "HTP0531A"  "HTP0534A"  "HTP0535A"  "HTP0537A"  "HTP0538A"  "HTP0539A" 
## [295] "HTP0540A"  "HTP0541A"  "HTP0542A"  "HTP0543A"  "HTP0544A"  "HTP0545A" 
## [301] "HTP0546A"  "HTP0547A"  "HTP0548A"  "HTP0549A"  "HTP0550A"  "HTP0551B" 
## [307] "HTP0552A"  "HTP0553A"  "HTP0554A"  "HTP0555B"  "HTP0556A"  "HTP0557B" 
## [313] "HTP0559B"  "HTP0560A"  "HTP0561A"  "HTP0563A"  "HTP0564A"  "HTP0565A" 
## [319] "HTP0566A"  "HTP0567B"  "HTP0568A"  "HTP0569A"  "HTP0570A"  "HTP0571A" 
## [325] "HTP0572A"  "HTP0573B"  "HTP0574A"  "HTP0575A"  "HTP0576B"  "HTP0577A" 
## [331] "HTP0578A"  "HTP0580A"  "HTP0581A"  "HTP0582B"  "HTP0583A"  "HTP0584A" 
## [337] "HTP0585A"  "HTP0586A"  "HTP0587A"  "HTP0588A"  "HTP0589A"  "HTP0590A" 
## [343] "HTP0591A"  "HTP0592A"  "HTP0593A"  "HTP0594A"  "HTP0595B"  "HTP0596A" 
## [349] "HTP0597A"  "HTP0598A"  "HTP0599A"  "HTP0600A"  "HTP0602A"  "HTP0603A" 
## [355] "HTP0604A"  "HTP0605A"  "HTP0607A"  "HTP0608A"  "HTP0609A"  "HTP0610A" 
## [361] "HTP0611A"  "HTP0612A"  "HTP0613A"  "HTP0615B"  "HTP0616A"  "HTP0617A" 
## [367] "HTP0618A"  "HTP0619A"  "HTP0620A"  "HTP0621A"  "HTP0622A"  "HTP0623A" 
## [373] "HTP0624A"  "HTP0625A"  "HTP0626A"  "HTP0627A"  "HTP0628B"  "HTP0629A" 
## [379] "HTP0630A"  "HTP0631A"  "HTP0632A"  "HTP0633A"  "HTP0634A"  "HTP0637A" 
## [385] "HTP0638A"  "HTP0639A"  "HTP0641A"  "HTP0642A"  "HTP0643A"  "HTP0644A" 
## [391] "HTP0645A"  "HTP0646A"  "HTP0647A"  "HTP0648B"  "HTP0649B"  "HTP0650A" 
## [397] "HTP0651A"  "HTP0652A"  "HTP0653B"  "HTP0654A"  "HTP0655B"  "HTP0657A" 
## [403] "HTP0658A"  "HTP0659A"  "HTP0660A"  "HTP0661A"  "HTP0662A"  "HTP0663A" 
## [409] "HTP0664A"  "HTP0665A"  "HTP0666A"  "HTP0667A"  "HTP0668A"  "HTP0669B" 
## [415] "HTP0672A"  "HTP0676A"  "HTP0677B"  "HTP0706A"  "HTP0708A"
colnames(mets)
##   [1] "X.15Z..Tetracosenoic.acid"                  
##   [2] "X.5.L.Glutamyl..L.glutamine"                
##   [3] "X1.4.beta.D.Xylan"                          
##   [4] "X10.S.17.S..DiHDHA.protectin.D1"            
##   [5] "X11.HETE"                                   
##   [6] "X12.S..HETE"                                
##   [7] "X15.S..HETE"                                
##   [8] "X2.Oxoglutaramate"                          
##   [9] "X2.Oxoglutarate"                            
##  [10] "X3..5..Cyclic.IMP"                          
##  [11] "X3.Methyleneoxindole"                       
##  [12] "X4.Acetamidobutanoate"                      
##  [13] "X4.Pyridoxate"                              
##  [14] "X5..Phosphoribosyl.N.formylglycinamide"     
##  [15] "X5.6.Dihydrothymine"                        
##  [16] "X5.Guanidino.2.oxopentanoate"               
##  [17] "X5.Hydroxyindoleacetate"                    
##  [18] "X5.Hydroxyisourate"                         
##  [19] "X5.Oxoproline"                              
##  [20] "X5.S..HETE"                                 
##  [21] "acetyl.carnitine"                           
##  [22] "Acetylcholine"                              
##  [23] "acyl.C12..O.dodecanoyl.carnitine."          
##  [24] "acyl.C12.1..O.dodecenoyl.carnitine."        
##  [25] "acyl.C14..O.tetradecanoyl.L.carnitine."     
##  [26] "acyl.C14.1..Tetradecenoyl.Carnitine."       
##  [27] "acyl.C16..L.Palmitoylcarnitine."            
##  [28] "acyl.C16.1..Hexadecenoyl.carnitine."        
##  [29] "acyl.C18..Octadecanoyl.L.carnitine."        
##  [30] "acyl.C18.1..O.octadecenoyl.L.carnitine."    
##  [31] "acyl.C18.2"                                 
##  [32] "acyl.C4.DC"                                 
##  [33] "acyl.C5"                                    
##  [34] "acyl.C5.OH"                                 
##  [35] "acyl.C5.1"                                  
##  [36] "Adenine"                                    
##  [37] "Adenosine"                                  
##  [38] "Alanine"                                    
##  [39] "Allantoate"                                 
##  [40] "AMP"                                        
##  [41] "Anthranilate"                               
##  [42] "Arginine"                                   
##  [43] "Ascorbate"                                  
##  [44] "Asparagine"                                 
##  [45] "Aspartate"                                  
##  [46] "butanoyl.l.carnitine"                       
##  [47] "Chenodeoxycholic.acid"                      
##  [48] "Cholic.acid"                                
##  [49] "Choline"                                    
##  [50] "Citrate"                                    
##  [51] "Creatine"                                   
##  [52] "Creatinine"                                 
##  [53] "Cys.Gly"                                    
##  [54] "Cysteine"                                   
##  [55] "Cystine"                                    
##  [56] "D.Arabitol"                                 
##  [57] "D.Glucono.1.5.lactone.6.phosphate"          
##  [58] "D.Glucose"                                  
##  [59] "D.Rhamnose"                                 
##  [60] "D.Ribose"                                   
##  [61] "Decanoic.acid..caprate."                    
##  [62] "Dehydroascorbate"                           
##  [63] "Deoxycholic.acid"                           
##  [64] "DiHOME"                                     
##  [65] "Dihomo.g.Linolenic.acid.eicosatrienoic.acid"
##  [66] "Dimethylglycine"                            
##  [67] "Diphosphate"                                
##  [68] "Docosahexaenoic.acid"                       
##  [69] "Docosapentaenoic.acid"                      
##  [70] "Dodecanedioic.acid"                         
##  [71] "Dodecanoic.acid"                            
##  [72] "Dopamine"                                   
##  [73] "Ectoine"                                    
##  [74] "EpOME.HODE"                                 
##  [75] "Fumarate"                                   
##  [76] "gamma.Glutamyl.Se.methylselenocysteine"     
##  [77] "gamma.L.Glutamyl.D.alanine"                 
##  [78] "Glutamate"                                  
##  [79] "Glutamine"                                  
##  [80] "Glycerol.3.phosphate"                       
##  [81] "Glycine"                                    
##  [82] "Glycochenodeoxycholic.acid"                 
##  [83] "Glycocholic.acid"                           
##  [84] "Guanidinoacetate"                           
##  [85] "Guanine"                                    
##  [86] "HEPE.oxoETE"                                
##  [87] "Heptanoic.acid"                             
##  [88] "Hexadecanoic.acid"                          
##  [89] "Hexadecenoic.acid"                          
##  [90] "Hexanoic.acid..caproate."                   
##  [91] "hexanoyl.L.carnitine"                       
##  [92] "Hexose.phosphate"                           
##  [93] "Histidine"                                  
##  [94] "HOTrE.oxoODE"                               
##  [95] "Hypoxanthine"                               
##  [96] "Icosapentaenoic.acid"                       
##  [97] "Icosatetraenoic.acid"                       
##  [98] "IDP"                                        
##  [99] "Indole"                                     
## [100] "Indole.3.acetaldehyde"                      
## [101] "Indole.3.acetate"                           
## [102] "kynurenine"                                 
## [103] "L.Adrenaline"                               
## [104] "L.Arabinose"                                
## [105] "L.Carnitine"                                
## [106] "L.Citrulline"                               
## [107] "L.Methionine.S.oxide"                       
## [108] "L.octanoylcarnitine"                        
## [109] "Lactate"                                    
## [110] "Leucine.isoleucine"                         
## [111] "Leukotriene.B4.PGA1.PGB1"                   
## [112] "Linoleate"                                  
## [113] "LPA_16.0"                                   
## [114] "LPA_16.1"                                   
## [115] "LPA_18.2"                                   
## [116] "LPA_20.4"                                   
## [117] "Lysine"                                     
## [118] "Lysophosphatidic.Acid"                      
## [119] "Lysophosphatidylinositol"                   
## [120] "Malate"                                     
## [121] "Maltose"                                    
## [122] "Mannitol"                                   
## [123] "Methionine"                                 
## [124] "N.Acetylneuraminate"                        
## [125] "N.Acetylornithine"                          
## [126] "N.formyl.kynurenine"                        
## [127] "Nonanoic.acid..pelargonate."                
## [128] "O.Decanoyl.L.carnitine"                     
## [129] "O.Decenoyl.L.carnitine"                     
## [130] "Octadecanoic.acid"                          
## [131] "Octadecatrienoic.acid"                      
## [132] "Octadecenoic.acid..Oleic.acid."             
## [133] "Octanoic.acid..caprylate."                  
## [134] "octenoyl.l.carnitine"                       
## [135] "Ornithine"                                  
## [136] "Oxaloacetate"                               
## [137] "Pantothenate"                               
## [138] "Phenylalanine"                              
## [139] "Phosphate"                                  
## [140] "Picolinic.acid"                             
## [141] "Proline"                                    
## [142] "propionyl.carnitine"                        
## [143] "Prostaglandin.A2.B2.J2"                     
## [144] "Prostaglandin.A3.B3"                        
## [145] "Pyridoxal"                                  
## [146] "Pyridoxamine.5..phosphate"                  
## [147] "Pyruvate"                                   
## [148] "quinolinic.acid"                            
## [149] "Resolvin.D1.D2"                             
## [150] "S.Adenosyl.L.homocysteine"                  
## [151] "Serine"                                     
## [152] "Sphinganine.1.phosphate"                    
## [153] "Sphingosine"                                
## [154] "Sphingosine.1.phosphate"                    
## [155] "Stearidonic.acid"                           
## [156] "Succinate"                                  
## [157] "Taurine"                                    
## [158] "Taurochenodeoxycholic.acid"                 
## [159] "Taurocholic.acid"                           
## [160] "Taurodeoxycholic.acid"                      
## [161] "Taurolithocholic.acid"                      
## [162] "Tauroursodeoxycholic.acid"                  
## [163] "Tetradecanoic.acid"                         
## [164] "Threonine"                                  
## [165] "Thymidine"                                  
## [166] "trans.4.Hydroxy.L.proline"                  
## [167] "Tryptophan"                                 
## [168] "Tyrosine"                                   
## [169] "UDP"                                        
## [170] "UMP"                                        
## [171] "Urate"                                      
## [172] "Ursodeoxycholic.acid"                       
## [173] "Valine"                                     
## [174] "Xanthine"
#read in the metadata
x <- getURL("https://raw.githubusercontent.com/DS3-2024/Visualize_Cluster_HTP/main/data/P4C_metadata_011722.csv")
info <- read.csv(text = x, row.names = 1, header = T)
dim(info)
## [1] 587  21
colnames(info)
##  [1] "RecordID"                       "Sex"                           
##  [3] "Karyotype"                      "Event_name"                    
##  [5] "LabID"                          "Age_at_visit"                  
##  [7] "BMI"                            "Sample_source"                 
##  [9] "Anxiety"                        "Any.autoimmune.skin.condition" 
## [11] "Any.congenital.heart.defect"    "Any.hearing.loss.condition"    
## [13] "Any.hypothyroidism"             "Any.seizure.history"           
## [15] "Any.sleep.apnea"                "Asthma.reactive.airway.disease"
## [17] "Celiac.disease"                 "Depression"                    
## [19] "Frequent.Recurrent.pneumonia"   "Obesity"                       
## [21] "Recurrent.otitis.media"
# make sure the patient order matches in both the meta data and the metabolimcs data
patients <- intersect(row.names(mets), row.names(info))
length(patients)
## [1] 419
mets <- mets[patients,]
info <- info[patients,]
mets <- mets[order(info$Karyotype),]
info <- info[order(info$Karyotype),]

# remove the batch effect of sample source where the samples were collected
mets <- 2^t((removeBatchEffect(t(log2(mets)), batch=info$Sample_source)))


info$order <- seq(1,nrow(info))
info$Karyotype <- as.factor(info$Karyotype)
mets.info <- cbind(log2(mets), info)

Principal Component Analysis

We will use PCA to plot the data and explore sample information. The examples used here is taken from Statquest. Please refer to the PCA video.

We will also be using this image to illustrate the regression line fit regression fit

# 1D
ids <- grep("T21",mets.info$Karyotype)
plot(mets.info$Prostaglandin.A2.B2.J2[ids],rep(0,length(mets.info$Prostaglandin.A2.B2.J2[ids])), pch=20, ylab="", xlab="Prostaglandin.A2.B2.J2 expression", cex=1, col="lightseagreen")
ids <- grep("Control",mets.info$Karyotype)
points(mets.info$Prostaglandin.A2.B2.J2[ids],rep(0.05,length(mets.info$Prostaglandin.A2.B2.J2[ids])), pch=20, col="salmon", cex=1)
legend(10, 1,  fill=c("lightseagreen", "salmon"), legend=c("T21", "Control"))

ggplot(mets.info, aes(x=Karyotype, y=Prostaglandin.A2.B2.J2, fill=Karyotype)) + geom_boxplot() + geom_jitter() + theme_bw()

# 2D
ggplot(mets.info, aes(x=Hypoxanthine, y=Prostaglandin.A2.B2.J2, color=Karyotype)) + geom_point()  + theme_bw()

# lets look at a simplified example
Met1 <- c(10,11,8,3,1,3)
Met2 <- c(5,4,5,3,3,1)
plot(Met1, Met2, pch =19)

# find the mean of M1 and M2
plot(Met1, Met2, pch =19)
points(mean(Met1), mean(Met2), col="purple", pch=19, cex=3)  

# center the data and plot
Met1 = Met1 - mean(Met1)
Met2 = Met2 - mean(Met2)
plot(Met1, Met2, pch =19)
segments(0,-10,0,20, col="grey", lty=2)
segments(-10,0,20,0, col="grey", lty=2)

# add a regression line
plot(Met1, Met2, pch =19)
segments(0,-10,0,20, col="grey", lty=2)
segments(-10,0,20,0, col="grey", lty=2)
abline(lm(Met2~Met1))
segments(0,0,4,0, col="red", lwd=2)
segments(4,1,4,0, col="red", lwd=2)

# back to the HTP data
# find the mean of the X and Y directions
ggplot(mets.info, aes(x=Hypoxanthine, y=Prostaglandin.A2.B2.J2)) + geom_point()  + theme_bw() +
   geom_point(aes(x=mean(mets.info$Hypoxanthine),y=mean(mets.info$Prostaglandin.A2.B2.J2)), colour="purple", size=5)
## Warning: Use of `mets.info$Hypoxanthine` is discouraged.
## ℹ Use `Hypoxanthine` instead.
## Warning: Use of `mets.info$Prostaglandin.A2.B2.J2` is discouraged.
## ℹ Use `Prostaglandin.A2.B2.J2` instead.
## Warning in geom_point(aes(x = mean(mets.info$Hypoxanthine), y = mean(mets.info$Prostaglandin.A2.B2.J2)), : All aesthetics have length 1, but the data has 419 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

# center the data and plot
mets.info$Hypoxanthine.standard <- mets.info$Hypoxanthine - mean(mets.info$Hypoxanthine)
mets.info$Prostaglandin.A2.B2.J2.standard <- mets.info$Prostaglandin.A2.B2.J2 - mean(mets.info$Prostaglandin.A2.B2.J2)
ggplot(mets.info, aes(x=Hypoxanthine.standard, y=Prostaglandin.A2.B2.J2.standard)) + geom_point()  + theme_bw()

# add a regression line
ggplot(mets.info, aes(x=Hypoxanthine.standard, y=Prostaglandin.A2.B2.J2.standard)) + geom_point()  + theme_bw() + geom_smooth(method='lm',se=F) + geom_point(aes(x=0,y=0), colour="red")
## Warning in geom_point(aes(x = 0, y = 0), colour = "red"): All aesthetics have length 1, but the data has 419 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

# note in the prcomp implementation of PCA, 
# x = PCs
# rotation = loadings
# sdev^2 = eigenvalues

#Iris data
pca <- prcomp(na.omit(iris.mat), scale=T)
autoplot(pca, data=iris, col='Species')

pca$rotation #display the loadings (named rotation in the prcomp data structure)
##                     PC1         PC2        PC3        PC4
## Sepal.Length  0.5220620 -0.37334080  0.7203991  0.2628555
## Sepal.Width  -0.2676921 -0.92485914 -0.2402154 -0.1235842
## Petal.Length  0.5803119 -0.02431115 -0.1405854 -0.8017998
## Petal.Width   0.5648277 -0.06827292 -0.6352617  0.5222557
get_eigenvalue(pca)
##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1 2.91964087       72.9910218                    72.99102
## Dim.2 0.91432690       22.8581724                    95.84919
## Dim.3 0.14524258        3.6310646                    99.48026
## Dim.4 0.02078965        0.5197411                   100.00000
fviz_pca_var(pca,
             col.var = "contrib", # Color by contributions to the PC
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE     # Avoid text overlapping
             )

#HTP data
pca <- prcomp(na.omit(mets), scale=T)

autoplot(pca, data=info, col='Karyotype')

autoplot(pca, data=info, col='Sample_source')

# remove outlier samples
hist(pca$x[,1], main ="PC1")

sort(pca$x[,1])
##    HTP0378A    HTP0561A    HTP0549A    HTP0467A    HTP0388A    HTP0336A 
## -9.45231329 -9.43089445 -8.95691066 -8.56379238 -8.26244962 -8.12758694 
##    HTP0556A   HTP0035B3    HTP0357A   HTP0243A3    HTP0545A    HTP0331A 
## -8.07235771 -7.78774025 -7.39167744 -7.33834927 -7.05604314 -7.01753433 
##    HTP0573B   HTP0087B2   HTP0116A2   HTP0249A2   HTP0226A3    HTP0530A 
## -6.96902574 -6.92198497 -6.84419532 -6.71153550 -6.65555618 -6.38150174 
##    HTP0457B   HTP0026B2    HTP0464A    HTP0315A    HTP0577A    HTP0547A 
## -6.29000484 -6.24484713 -6.18076934 -6.17022779 -6.08716521 -5.97439384 
##    HTP0295A    HTP0296A    HTP0609A   HTP0211A3   HTP0219A2    HTP0300A 
## -5.92240655 -5.90097381 -5.80035071 -5.46584207 -5.39695345 -5.35184620 
##   HTP0015A4   HTP0105B3    HTP0659A    HTP0598A   HTP0251A2    HTP0572A 
## -5.14371773 -5.13666464 -5.12693853 -5.11191334 -5.00495986 -4.98500726 
##    HTP0563A    HTP0649B   HTP0335A2    HTP0434A   HTP0277A3    HTP0550A 
## -4.88441199 -4.86726583 -4.77222842 -4.73983468 -4.73053209 -4.67116259 
##   HTP0050A3    HTP0663A    HTP0569A    HTP0668A   HTP0052A2   HTP0040B3 
## -4.66496085 -4.65680951 -4.65290610 -4.63500448 -4.63331644 -4.62356407 
##    HTP0404A    HTP0622A    HTP0123A   HTP0399A2    HTP0484A    HTP0616A 
## -4.61688961 -4.60125127 -4.60019628 -4.56419010 -4.48058543 -4.47802469 
##    HTP0512A   HTP0439A2    HTP0409A   HTP0044B2    HTP0394A    HTP0553A 
## -4.44592032 -4.34502161 -4.29783706 -4.29235373 -4.26612327 -4.14024148 
##    HTP0605A    HTP0669B    HTP0306A    HTP0325A   HTP0207A4    HTP0589A 
## -4.13111294 -4.13089470 -4.10278883 -4.08273918 -4.01286557 -3.99321139 
##   HTP0073A2   HTP0022B2    HTP0147B   HTP0054A2    HTP0537A    HTP0654A 
## -3.89744022 -3.82304576 -3.77290899 -3.68454990 -3.64699215 -3.61249581 
##    HTP0351A    HTP0506B    HTP0619A    HTP0585A    HTP0566A    HTP0413A 
## -3.60398334 -3.60195316 -3.57545873 -3.57522995 -3.54878317 -3.53861756 
##   HTP0442A2    HTP0414A    HTP0440A   HTP0329A2    HTP0396A   HTP0107A3 
## -3.53135576 -3.49583482 -3.48496675 -3.42882274 -3.41924028 -3.41594221 
##    HTP0307B   HTP0059B2    HTP0392A    HTP0151A    HTP0407A    HTP0610A 
## -3.40092351 -3.38059301 -3.34712147 -3.31298862 -3.27853373 -3.24286956 
##    HTP0571A   HTP0078B2   HTP0070A3    HTP0321A   HTP0018B3   HTP0084A2 
## -3.20784983 -3.19465773 -3.13236649 -3.11923195 -3.11503758 -2.92045509 
##    HTP0202B    HTP0401A    HTP0523B    HTP0379A   HTP0312A2    HTP0428A 
## -2.89930509 -2.86857682 -2.85537356 -2.81930417 -2.81135628 -2.76801536 
##    HTP0445A    HTP0567B    HTP0386A    HTP0429B    HTP0348A    HTP0586A 
## -2.75857726 -2.74765567 -2.72260896 -2.69902398 -2.65616351 -2.62773769 
##    HTP0664A   HTP0001B2    HTP0482A    HTP0389A   HTP0241A2    HTP0517A 
## -2.59510420 -2.56225653 -2.55379981 -2.53560720 -2.52256135 -2.51854259 
##   HTP0324A2    HTP0303A    HTP0557B    HTP0639A    HTP0582B    HTP0495B 
## -2.51570564 -2.51049193 -2.47909933 -2.47465294 -2.46101064 -2.45721588 
##   HTP0263A2    HTP0665A    HTP0564A    HTP0418A    HTP0469A    HTP0119A 
## -2.44098237 -2.43774740 -2.43534023 -2.29983569 -2.28979908 -2.28886769 
##   HTP0017A4   HTP0109A3   HTP0148B2    HTP0559B    HTP0347B   HTP0254A2 
## -2.24805294 -2.23964576 -2.22130761 -2.21494693 -2.18597054 -2.18224787 
##   HTP0259A2   HTP0036A2    HTP0641A    HTP0570A    HTP0592A    HTP0661A 
## -2.17892269 -2.14456379 -2.12721997 -2.11974981 -2.11045510 -2.08309722 
##    HTP0552A    HTP0652A    HTP0479A   HTP0334A2    HTP0584A    HTP0408A 
## -2.04816718 -2.00323676 -1.98844968 -1.96640957 -1.95124012 -1.91148223 
##    HTP0516A    HTP0326A   HTP0242B3    HTP0548A   HTP0034B3    HTP0376B 
## -1.90683102 -1.89295717 -1.84673786 -1.81820028 -1.75851698 -1.75809092 
##    HTP0455B    HTP0531A    HTP0627A    HTP0433A    HTP0419A   HTP0236A2 
## -1.75775028 -1.74352619 -1.72530504 -1.68454341 -1.64110839 -1.59125919 
##    HTP0574A    HTP0554A    HTP0460A    HTP0651A   HTP0043B2    HTP0337A 
## -1.58070319 -1.55650559 -1.55546543 -1.55146462 -1.52420495 -1.46372157 
##   HTP0103A2   HTP0019B2    HTP0323A    HTP0332A    HTP0576B    HTP0343B 
## -1.43065053 -1.40923277 -1.40452849 -1.39946060 -1.34291138 -1.32589629 
##    HTP0205B    HTP0294B   HTP0101A2    HTP0588A    HTP0637A    HTP0676A 
## -1.28851159 -1.28525815 -1.27623333 -1.25177347 -1.25050983 -1.23438158 
##    HTP0477A   HTP0112A3    HTP0370A    HTP0422A    HTP0316B    HTP0621A 
## -1.20621742 -1.11991513 -1.10908515 -1.10445041 -1.10387767 -1.09794574 
##    HTP0427A    HTP0594A    HTP0288B    HTP0345A    HTP0480A    HTP0487A 
## -1.08719763 -1.04825989 -1.01761080 -0.99943763 -0.99526275 -0.99454138 
##    HTP0406A    HTP0476A   HTP0023A2    HTP0540A    HTP0638A   HTP0096B2 
## -0.93446088 -0.92468697 -0.88620511 -0.86209968 -0.84268004 -0.83343120 
##    HTP0149B    HTP0612A   HTP0214B2    HTP0341A    HTP0400A   HTP0100B2 
## -0.82713027 -0.79980099 -0.79605706 -0.79439494 -0.76675535 -0.75407006 
##    HTP0330A    HTP0432A    HTP0425A    HTP0575A    HTP0613A    HTP0602A 
## -0.75316030 -0.74039554 -0.73853296 -0.73564629 -0.68612973 -0.68103913 
##   HTP0055B2    HTP0534A    HTP0471A    HTP0340A    HTP0118B    HTP0525A 
## -0.64834747 -0.62988362 -0.60192790 -0.59757722 -0.58569356 -0.57503685 
##    HTP0527B   HTP0051B2    HTP0403A    HTP0624A   HTP0067B2    HTP0485A 
## -0.54818056 -0.52754352 -0.52098902 -0.48999323 -0.47434952 -0.47267320 
##    HTP0591A    HTP0643A    HTP0398A    HTP0430A    HTP0538A   HTP0053A2 
## -0.46254279 -0.38316612 -0.37868498 -0.34347475 -0.33041914 -0.32410168 
##   HTP0230A2    HTP0402A    HTP0607A    HTP0587A    HTP0374A    HTP0544A 
## -0.31766948 -0.31439263 -0.25982378 -0.25285375 -0.18652073 -0.09765263 
##    HTP0339A   HTP0111B2    HTP0390A    HTP0653B   HTP0058B2   HTP0435A2 
## -0.09126631 -0.07945610 -0.05792800 -0.05384159 -0.02664515 -0.01577486 
##    HTP0314B    HTP0436A    HTP0328A    HTP0420A    HTP0535A    HTP0298A 
##  0.01995694  0.03087348  0.04166979  0.06308144  0.06821869  0.08999450 
##    HTP0642A    HTP0437A    HTP0603A    HTP0565A    HTP0618A    HTP0644A 
##  0.10363101  0.13011771  0.15292021  0.20018315  0.23001954  0.23691402 
##   HTP0127B2    HTP0206B   HTP0301A3   HTP0508B2   HTP0238A3    HTP0578A 
##  0.29495125  0.29739942  0.30184844  0.30980492  0.31258726  0.34637577 
##    HTP0491A   HTP0104B2    HTP0451A    HTP0630A    HTP0327A    HTP0393A 
##  0.34854551  0.36022592  0.36901618  0.38122240  0.38955712  0.40365139 
##    HTP0475A    HTP0658A    HTP0391A    HTP0125B    HTP0519A   HTP0085B2 
##  0.44292110  0.45003876  0.50101866  0.51929552  0.54223412  0.54242697 
##    HTP0410A   HTP0361B2    HTP0423A    HTP0593A    HTP0667A    HTP0660A 
##  0.54845247  0.56324174  0.63058255  0.65535612  0.66815425  0.67821123 
##    HTP0450A    HTP0384A    HTP0411A    HTP0604A    HTP0364A    HTP0287B 
##  0.70376361  0.72894429  0.73263754  0.79628683  0.80861881  0.81599587 
##   HTP0092B2    HTP0666A    HTP0657A    HTP0349B    HTP0465A    HTP0520B 
##  0.84853834  0.86948449  0.88298664  0.88913478  0.89273872  0.92563891 
##   HTP0098B2    HTP0611A    HTP0650A   HTP0061B2    HTP0352B    HTP0623A 
##  0.96769924  0.97109566  1.02064631  1.03967667  1.04581576  1.05251147 
##    HTP0608A    HTP0662A    HTP0354A    HTP0461A    HTP0625A    HTP0319A 
##  1.09373138  1.11228125  1.20499925  1.20870270  1.22158558  1.28346437 
##   HTP0048B2   HTP0025A3    HTP0590A   HTP0126B2    HTP0342A   HTP0066A2 
##  1.32551879  1.33396822  1.38646417  1.42932646  1.43742009  1.49174987 
##    HTP0456A   HTP0080A2   HTP0095B2   HTP0213A3    HTP0424A   HTP0005A3 
##  1.54530395  1.60236137  1.60592690  1.62753641  1.62761640  1.69679816 
##    HTP0492B    HTP0405A   HTP0333A2   HTP0468A2   HTP0090B2    HTP0387A 
##  1.75043068  1.75449463  1.78042760  1.78707064  1.81711321  1.88350102 
##    HTP0382A    HTP0497B    HTP0426A    HTP0446A   HTP0079B2    HTP0462A 
##  1.91405480  1.92559660  1.92920669  1.95512458  1.95840767  1.98138185 
##    HTP0708A    HTP0581A   HTP0057B2   HTP0447B2    HTP0510A    HTP0472A 
##  2.00688001  2.02370482  2.06626497  2.13682062  2.16443887  2.17850360 
##    HTP0473A   HTP0320A2    HTP0452A   HTP0431A2    HTP0515A    HTP0454A 
##  2.21598587  2.24573600  2.30176253  2.30623748  2.36911121  2.37831310 
##    HTP0599A    HTP0289B    HTP0595B    HTP0617A    HTP0346B    HTP0466B 
##  2.41236974  2.42579438  2.43494407  2.49180197  2.54140900  2.62088226 
##    HTP0632A    HTP0560A    HTP0421A   HTP0047B2    HTP0634A    HTP0381A 
##  2.64769510  2.76147873  2.95348092  2.95546895  3.07004956  3.08093601 
##    HTP0380A    HTP0543A    HTP0463A    HTP0501A    HTP0459A   HTP0012A2 
##  3.10317496  3.11083133  3.13132675  3.15691751  3.17798667  3.21152691 
##    HTP0204A    HTP0458A    HTP0488A    HTP0438A    HTP0474A    HTP0502B 
##  3.22487639  3.24405016  3.26430011  3.31658692  3.46723232  3.52376816 
##    HTP0494A   HTP0443A2    HTP0706A    HTP0672A    HTP0583A   HTP0216B2 
##  3.56561560  3.78024966  3.91704112  3.92182984  3.93954234  3.95246190 
##   HTP0062B2    HTP0647A    HTP0539A    HTP0478A    HTP0397A    HTP0412A 
##  3.98192042  4.04785526  4.06729105  4.16899341  4.19171501  4.24802155 
##    HTP0395A    HTP0514A    HTP0580A    HTP0546A    HTP0385A    HTP0542A 
##  4.30520520  4.32180884  4.40498022  4.42076596  4.44241101  4.46680236 
##    HTP0448A   HTP0262A2    HTP0646A    HTP0367A   HTP0444B2    HTP0633A 
##  4.51574297  4.54104473  4.64802430  4.70543086  4.82461870  4.88651189 
##    HTP0555B    HTP0208B    HTP0355B    HTP0375A    HTP0368A    HTP0626A 
##  4.89717903  4.95544348  4.96342155  5.01319081  5.07597029  5.13792464 
##   HTP0261A2    HTP0290B    HTP0631A    HTP0629A    HTP0645A    HTP0551B 
##  5.23871385  5.34392058  5.57489848  5.62549060  5.72452691  5.84094282 
##    HTP0600A    HTP0470A    HTP0449A   HTP0318A3    HTP0677B    HTP0655B 
##  5.90156552  6.06904551  6.26773986  6.42324259  6.55812703  7.01236448 
##    HTP0597A   HTP0077B2    HTP0524B    HTP0373A    HTP0615B    HTP0620A 
##  7.03108216  7.25306884  7.30972716  7.54452671  7.77829543  8.91447993 
##    HTP0648B   HTP0042B2    HTP0322B    HTP0415A    HTP0507A    HTP0628B 
##  9.03609172  9.15597370  9.20316503  9.96060302 10.22714064 10.57801003 
##   HTP0198A3    HTP0293A    HTP0310A    HTP0146B    HTP0493B    HTP0338B 
## 11.06122111 11.09098080 11.29508705 11.40109323 12.53692307 12.79348372 
##    HTP0498B   HTP0247A2    HTP0596A    HTP0568A    HTP0541A 
## 13.35725235 15.36261472 19.19796064 34.19194982 42.56208877
row.names(mets)[pca$x[,1] > 15]
## [1] "HTP0247A2" "HTP0541A"  "HTP0568A"  "HTP0596A"
mets <- mets[pca$x[,1] < 15,]
info <- info[pca$x[,1] < 15,]
mets.info <- cbind(mets, info)
dim(mets)
## [1] 415 174
pca <- prcomp(na.omit(mets), scale=T)
autoplot(pca, data=info, col='Karyotype')

autoplot(pca, data=info, col='Sample_source')

autoplot(pca, data=info, col='Age_at_visit')

 # scree plot
var_explained = cbind(PC=seq(1, length(pca$sdev)), var=100*(pca$sdev^2 / sum(pca$sdev^2)))
ggplot(var_explained, aes(x=PC, y=var)) + geom_line() + xlab("Principal Component") + theme_bw() +
  ylab("Variance Explained (%)") + ggtitle("Scree Plot") + xlim(1,20)
## Warning: Removed 154 rows containing missing values or values outside the scale range
## (`geom_line()`).

t-distributed stochastic neighbor embedding

We will use tSNE to plot the data and explore sample information

# tSNE is stochastic so it will produce different results based on the random seed. To get the same results, you will need to fix the seed
set.seed(48673)

# theta is parameter that balances speed and accuracy. theta=0 is the exact tSNE calculation
# perplexity is the value that balances density of the cluster size

#Iris data
tsne <- Rtsne(iris.mat, pca=F, verbose=T, perplexity=30, theta=0)
## Read the 149 x 4 data matrix successfully!
## Using no_dims = 2, perplexity = 30.000000, and theta = 0.000000
## Computing input similarities...
## Symmetrizing...
## Done in 0.00 seconds!
## Learning embedding...
## Iteration 50: error is 43.428456 (50 iterations in 0.01 seconds)
## Iteration 100: error is 47.122936 (50 iterations in 0.01 seconds)
## Iteration 150: error is 46.191149 (50 iterations in 0.01 seconds)
## Iteration 200: error is 43.840578 (50 iterations in 0.01 seconds)
## Iteration 250: error is 44.242374 (50 iterations in 0.01 seconds)
## Iteration 300: error is 0.464825 (50 iterations in 0.01 seconds)
## Iteration 350: error is 0.149986 (50 iterations in 0.01 seconds)
## Iteration 400: error is 0.137901 (50 iterations in 0.01 seconds)
## Iteration 450: error is 0.132383 (50 iterations in 0.01 seconds)
## Iteration 500: error is 0.130166 (50 iterations in 0.01 seconds)
## Iteration 550: error is 0.128999 (50 iterations in 0.01 seconds)
## Iteration 600: error is 0.128274 (50 iterations in 0.01 seconds)
## Iteration 650: error is 0.127771 (50 iterations in 0.01 seconds)
## Iteration 700: error is 0.127393 (50 iterations in 0.01 seconds)
## Iteration 750: error is 0.127103 (50 iterations in 0.01 seconds)
## Iteration 800: error is 0.126862 (50 iterations in 0.01 seconds)
## Iteration 850: error is 0.126670 (50 iterations in 0.01 seconds)
## Iteration 900: error is 0.126505 (50 iterations in 0.01 seconds)
## Iteration 950: error is 0.126362 (50 iterations in 0.01 seconds)
## Iteration 1000: error is 0.126241 (50 iterations in 0.01 seconds)
## Fitting performed in 0.13 seconds.
iris.info.tsne <- cbind(tsne$Y, iris)
colnames(iris.info.tsne)[1] <- "tsne1"
colnames(iris.info.tsne)[2] <- "tsne2"

ggplot(iris.info.tsne, aes(x=tsne1, y=tsne2, color=Species)) + geom_point()  + theme_bw()   

#HTP data
tsne <- Rtsne(mets, pca=F, verbose=T, perplexity=30, theta=0)
## Read the 415 x 174 data matrix successfully!
## Using no_dims = 2, perplexity = 30.000000, and theta = 0.000000
## Computing input similarities...
## Symmetrizing...
## Done in 0.02 seconds!
## Learning embedding...
## Iteration 50: error is 55.599908 (50 iterations in 0.05 seconds)
## Iteration 100: error is 55.052849 (50 iterations in 0.05 seconds)
## Iteration 150: error is 55.045881 (50 iterations in 0.05 seconds)
## Iteration 200: error is 55.050591 (50 iterations in 0.05 seconds)
## Iteration 250: error is 55.049018 (50 iterations in 0.05 seconds)
## Iteration 300: error is 0.753257 (50 iterations in 0.05 seconds)
## Iteration 350: error is 0.698318 (50 iterations in 0.05 seconds)
## Iteration 400: error is 0.685135 (50 iterations in 0.05 seconds)
## Iteration 450: error is 0.680085 (50 iterations in 0.05 seconds)
## Iteration 500: error is 0.678387 (50 iterations in 0.05 seconds)
## Iteration 550: error is 0.677656 (50 iterations in 0.05 seconds)
## Iteration 600: error is 0.677346 (50 iterations in 0.05 seconds)
## Iteration 650: error is 0.677217 (50 iterations in 0.05 seconds)
## Iteration 700: error is 0.677162 (50 iterations in 0.05 seconds)
## Iteration 750: error is 0.677137 (50 iterations in 0.05 seconds)
## Iteration 800: error is 0.677126 (50 iterations in 0.05 seconds)
## Iteration 850: error is 0.677121 (50 iterations in 0.05 seconds)
## Iteration 900: error is 0.677118 (50 iterations in 0.05 seconds)
## Iteration 950: error is 0.677116 (50 iterations in 0.05 seconds)
## Iteration 1000: error is 0.677116 (50 iterations in 0.05 seconds)
## Fitting performed in 0.97 seconds.
mets.info.tsne <- cbind(tsne$Y, mets.info)
colnames(mets.info.tsne)[1] <- "tsne1"
colnames(mets.info.tsne)[2] <- "tsne2"

ggplot(mets.info.tsne, aes(x=tsne1, y=tsne2, color=Karyotype)) + geom_point()  + theme_bw()   

ggplot(mets.info.tsne, aes(x=tsne1, y=tsne2, color=Sex)) + geom_point()  + theme_bw()   

ggplot(mets.info.tsne, aes(x=tsne1, y=tsne2, color=Sample_source)) + geom_point()  + theme_bw()   

# playing with perplexity
tsne <- Rtsne(mets, pca=F, verbose=T, perplexity=5, theta=0)
## Read the 415 x 174 data matrix successfully!
## Using no_dims = 2, perplexity = 5.000000, and theta = 0.000000
## Computing input similarities...
## Symmetrizing...
## Done in 0.02 seconds!
## Learning embedding...
## Iteration 50: error is 72.563005 (50 iterations in 0.05 seconds)
## Iteration 100: error is 69.723166 (50 iterations in 0.05 seconds)
## Iteration 150: error is 69.565391 (50 iterations in 0.05 seconds)
## Iteration 200: error is 69.657449 (50 iterations in 0.05 seconds)
## Iteration 250: error is 69.423648 (50 iterations in 0.05 seconds)
## Iteration 300: error is 1.213984 (50 iterations in 0.05 seconds)
## Iteration 350: error is 1.010454 (50 iterations in 0.05 seconds)
## Iteration 400: error is 0.945363 (50 iterations in 0.05 seconds)
## Iteration 450: error is 0.917389 (50 iterations in 0.05 seconds)
## Iteration 500: error is 0.902785 (50 iterations in 0.05 seconds)
## Iteration 550: error is 0.894018 (50 iterations in 0.05 seconds)
## Iteration 600: error is 0.888088 (50 iterations in 0.05 seconds)
## Iteration 650: error is 0.883727 (50 iterations in 0.05 seconds)
## Iteration 700: error is 0.880320 (50 iterations in 0.05 seconds)
## Iteration 750: error is 0.877578 (50 iterations in 0.05 seconds)
## Iteration 800: error is 0.875147 (50 iterations in 0.05 seconds)
## Iteration 850: error is 0.873129 (50 iterations in 0.05 seconds)
## Iteration 900: error is 0.871503 (50 iterations in 0.05 seconds)
## Iteration 950: error is 0.870060 (50 iterations in 0.05 seconds)
## Iteration 1000: error is 0.868780 (50 iterations in 0.05 seconds)
## Fitting performed in 0.99 seconds.
mets.info.tsne <- cbind(tsne$Y, mets.info)
colnames(mets.info.tsne)[1] <- "tsne1"
colnames(mets.info.tsne)[2] <- "tsne2"
ggplot(mets.info.tsne, aes(x=tsne1, y=tsne2, color=Karyotype)) + geom_point()  + theme_bw()   

tsne <- Rtsne(mets, pca=F, verbose=T, perplexity=100, theta=0)
## Read the 415 x 174 data matrix successfully!
## Using no_dims = 2, perplexity = 100.000000, and theta = 0.000000
## Computing input similarities...
## Symmetrizing...
## Done in 0.02 seconds!
## Learning embedding...
## Iteration 50: error is 45.172562 (50 iterations in 0.05 seconds)
## Iteration 100: error is 45.171294 (50 iterations in 0.05 seconds)
## Iteration 150: error is 45.169719 (50 iterations in 0.05 seconds)
## Iteration 200: error is 45.163248 (50 iterations in 0.05 seconds)
## Iteration 250: error is 45.146644 (50 iterations in 0.05 seconds)
## Iteration 300: error is 0.263189 (50 iterations in 0.05 seconds)
## Iteration 350: error is 0.258365 (50 iterations in 0.05 seconds)
## Iteration 400: error is 0.257187 (50 iterations in 0.05 seconds)
## Iteration 450: error is 0.257186 (50 iterations in 0.05 seconds)
## Iteration 500: error is 0.257186 (50 iterations in 0.05 seconds)
## Iteration 550: error is 0.257186 (50 iterations in 0.05 seconds)
## Iteration 600: error is 0.257186 (50 iterations in 0.05 seconds)
## Iteration 650: error is 0.257186 (50 iterations in 0.05 seconds)
## Iteration 700: error is 0.257186 (50 iterations in 0.05 seconds)
## Iteration 750: error is 0.257186 (50 iterations in 0.05 seconds)
## Iteration 800: error is 0.257186 (50 iterations in 0.05 seconds)
## Iteration 850: error is 0.257186 (50 iterations in 0.05 seconds)
## Iteration 900: error is 0.257186 (50 iterations in 0.05 seconds)
## Iteration 950: error is 0.257186 (50 iterations in 0.05 seconds)
## Iteration 1000: error is 0.257186 (50 iterations in 0.05 seconds)
## Fitting performed in 0.97 seconds.
mets.info.tsne <- cbind(tsne$Y, mets.info)
colnames(mets.info.tsne)[1] <- "tsne1"
colnames(mets.info.tsne)[2] <- "tsne2"
ggplot(mets.info.tsne, aes(x=tsne1, y=tsne2, color=Karyotype)) + geom_point()  + theme_bw()   

UMAP: Uniform Manifold Approximation and Projection

We will use UMAP to plot the data and explore sample information

#Iris data
u <- umap(iris.mat)
iris.info.umap <- cbind(u$layout, iris)
colnames(iris.info.umap)[1] <- "UMAP1"
colnames(iris.info.umap)[2] <- "UMAP2"

ggplot(iris.info.umap, aes(x=UMAP1, y=UMAP2, color=Species)) + geom_point()  + theme_bw()   

#HTP data
u <- umap(mets)
mets.info.umap <- cbind(u$layout, mets.info)
colnames(mets.info.umap)[1] <- "UMAP1"
colnames(mets.info.umap)[2] <- "UMAP2"

ggplot(mets.info.umap, aes(x=UMAP1, y=UMAP2, color=Karyotype)) + geom_point()  + theme_bw()   

# to explore the parameters in UMAP, you can see the default values and adjust in the function
umap.defaults

u <- umap(mets, n_neighbors=5)
mets.info.umap <- cbind(u$layout, mets.info)
colnames(mets.info.umap)[1] <- "UMAP1"
colnames(mets.info.umap)[2] <- "UMAP2"

ggplot(mets.info.umap, aes(x=UMAP1, y=UMAP2, color=Karyotype)) + geom_point()  + theme_bw()   

Session Information

sessionInfo()
## R version 4.3.3 (2024-02-29)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.5
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Denver
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] factoextra_1.0.7 RCurl_1.98-1.14  limma_3.58.1     umap_0.2.10.0   
## [5] Rtsne_0.17       ggfortify_0.4.17 ggplot2_3.5.1   
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.5      xfun_0.45         bslib_0.7.0       ggrepel_0.9.5    
##  [5] rstatix_0.7.2     lattice_0.22-6    vctrs_0.6.5       tools_4.3.3      
##  [9] bitops_1.0-7      generics_0.1.3    tibble_3.2.1      fansi_1.0.6      
## [13] highr_0.11        pkgconfig_2.0.3   Matrix_1.6-5      lifecycle_1.0.4  
## [17] compiler_4.3.3    farver_2.1.2      stringr_1.5.1     statmod_1.5.0    
## [21] munsell_0.5.1     carData_3.0-5     htmltools_0.5.8.1 sass_0.4.9       
## [25] yaml_2.3.8        pillar_1.9.0      car_3.1-2         ggpubr_0.6.0     
## [29] jquerylib_0.1.4   tidyr_1.3.1       openssl_2.2.0     cachem_1.1.0     
## [33] abind_1.4-5       nlme_3.1-165      RSpectra_0.16-1   tidyselect_1.2.1 
## [37] digest_0.6.35     stringi_1.8.4     dplyr_1.1.4       purrr_1.0.2      
## [41] labeling_0.4.3    splines_4.3.3     fastmap_1.2.0     grid_4.3.3       
## [45] colorspace_2.1-0  cli_3.6.2         magrittr_2.0.3    utf8_1.2.4       
## [49] broom_1.0.6       withr_3.0.0       scales_1.3.0      backports_1.5.0  
## [53] rmarkdown_2.27    reticulate_1.37.0 gridExtra_2.3     ggsignif_0.6.4   
## [57] askpass_1.2.0     png_0.1-8         evaluate_0.24.0   knitr_1.47       
## [61] mgcv_1.9-1        rlang_1.1.4       Rcpp_1.0.12       glue_1.7.0       
## [65] jsonlite_1.8.8    R6_2.5.1